import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import numpy as np
from scipy import stats
import sys
sys.path.append(sys.argv[1])

import pandas as pd  
import matplotlib.pyplot as plt  
from decision_company import read_csv_file, create_subplots, get_fig_from_df, count_unique_values, set_layout, save_plot, show_plots

# Load the dataset  
credit_customers = read_csv_file(os.path.join(sys.argv[1], 'credit_customers.csv'))  

# Plot histograms  
fig, axes = create_subplots(2, 2, figsize=(12, 8))  
  
get_fig_from_df(count_unique_values(credit_customers['credit_history']),kind='bar', ax=axes[0, 0], title='Credit History')  
get_fig_from_df(count_unique_values(credit_customers['employment']), kind='bar', ax=axes[0, 1], title='Employment')  
get_fig_from_df(credit_customers['age'],kind='hist', bins=20, ax=axes[1, 0], title='Age')  
get_fig_from_df(credit_customers['credit_amount'], kind='hist', bins=20, ax=axes[1, 1], title='Credit Amount')  

set_layout() 
save_plot('ref_result/subplots.png') 
# show_plots()  

import pandas as pd  
import matplotlib.pyplot as plt  
from decision_company import read_csv_file, get_fig_from_df, count_unique_values, set_layout, bind_dataframe, pivot_a_level, fill_missing_values, create_subplots, show_plots, save_plot, logical_and, filter_by_condition, make_bins

# Load the dataset  

# Calculate approval rates  
credit_history_group = bind_dataframe(credit_customers, 'credit_history')  
credit_history_counts = count_unique_values(credit_history_group['class'], normalize=True)  
approval_rates_credit_history = fill_missing_values(pivot_a_level(credit_history_counts), value=0)['good']  
  
employment_group = bind_dataframe(credit_customers, 'employment')  
employment_counts = count_unique_values(employment_group['class'], normalize=True)  
approval_rates_employment = fill_missing_values(pivot_a_level(employment_counts), value=0)['good']  
  
age_column = credit_customers['age']  
middle_age_condition = logical_and(age_column > 40, age_column < 59)  
middle_age_customers = filter_by_condition(credit_customers, middle_age_condition)  
middle_age_group = bind_dataframe(middle_age_customers, 'age')  
middle_age_counts = count_unique_values(middle_age_group['class'], normalize=True)  
approval_rates_middle_age = fill_missing_values(pivot_a_level(middle_age_counts), value=0)['good']  
  
credit_customers['credit_amount_range'] = make_bins(credit_customers['credit_amount'], bins=[0, 2000, 4000, 6000, 8000, 10000, 12000, 14000, 16000, 18000, 20000], include_lowest=True)  
credit_amount_group = bind_dataframe(credit_customers, 'credit_amount_range')  
credit_amount_counts = count_unique_values(credit_amount_group['class'], normalize=True)  
approval_rates_credit_amount = fill_missing_values(pivot_a_level(credit_amount_counts), value=0)['good']  
  
# Plot bar charts  
fig, axes = create_subplots(2, 2, figsize=(12, 8))  
  
get_fig_from_df(approval_rates_credit_history, kind='bar', ax=axes[0, 0], title='Approval Rates by Credit History')  
get_fig_from_df(approval_rates_employment, kind='bar', ax=axes[0, 1], title='Approval Rates by Employment Status')  
get_fig_from_df(approval_rates_middle_age, kind='bar', ax=axes[1, 0], title='Approval Rates for Middle-Age Group (40-59)')  
get_fig_from_df(approval_rates_credit_amount, kind='bar', ax=axes[1, 1], title='Approval Rates by Credit Amount Range')  
  
set_layout()  
save_plot('ref_result/bar_chart.png')  
# show_plots()  


import pandas as pd  
import matplotlib.pyplot as plt  
import seaborn as sns  
from decision_company import read_csv_file, create_subplots, df_copy, cast_to_a_dtype, categoricalIndex, categorical_codes, scatter_fig_instance, set_plot_split_title, set_layout, show_plots, save_plot, fetch_column

# Load the dataset  
  
# Convert 'class' column to binary values  
credit_customers['class_binary'] = fetch_column(credit_customers, 'class').apply(lambda x: 1 if x == 'good' else 0)  
  
# Create a new DataFrame with numerical values for credit_history and employment  
credit_customers_numerical = df_copy(credit_customers)  
credit_customers_numerical['credit_history'] = categorical_codes(categoricalIndex(cast_to_a_dtype(fetch_column(credit_customers_numerical, 'credit_history'), 'category')))
credit_customers_numerical['employment'] = categorical_codes(categoricalIndex(cast_to_a_dtype(fetch_column(credit_customers_numerical, 'employment'), 'category')))  
  
# Plot scatter plots  
fig, axes = create_subplots(2, 2, figsize=(12, 8))  
  
scatter_fig_instance(credit_customers_numerical, 'credit_history', 'class_binary', axes[0, 0])  
set_plot_split_title(axes[0, 0], 'Loan Approval Rates vs. Credit History')  
  
scatter_fig_instance(credit_customers_numerical, 'employment', 'class_binary', axes[0, 1])  
set_plot_split_title(axes[0, 1], 'Loan Approval Rates vs. Employment Status')  
  
scatter_fig_instance(credit_customers_numerical, 'age', 'class_binary', axes[1, 0])  
set_plot_split_title(axes[1, 0], 'Loan Approval Rates vs. Age')  
  
scatter_fig_instance(credit_customers_numerical, 'credit_amount', 'class_binary', axes[1, 1])  
set_plot_split_title(axes[1, 1], 'Loan Approval Rates vs. Credit Amount')  
  
set_layout()  
save_plot('ref_result/scatterplot.png')  
# show_plots()  


import pandas as pd  
import matplotlib.pyplot as plt  
from decision_company import read_csv_file, logical_and, check_elements_in_list, filter_by_condition, count_unique_values, create_subplots, set_plot_split_title, set_layout, show_plots, save_plot, fetch_column

# Load the dataset      
  
# Filter customer segments      
middle_aged_condition = logical_and(fetch_column(credit_customers, 'age') > 40, fetch_column(credit_customers, 'age') < 59)  
middle_aged_customers = filter_by_condition(credit_customers, middle_aged_condition)  
  
stable_employment_condition = check_elements_in_list(fetch_column(credit_customers, 'employment'), ['>=7', '4<=X<7'])  
stable_employment_customers = filter_by_condition(credit_customers, stable_employment_condition)  
  
good_credit_history_condition = check_elements_in_list(fetch_column(credit_customers, 'credit_history'), ['existing paid', 'no credits/all paid', 'all paid'])  
good_credit_history_customers = filter_by_condition(credit_customers, good_credit_history_condition)  
  
# Calculate approval rates      
approval_rates_middle_aged = count_unique_values(fetch_column(middle_aged_customers, 'class'), normalize=True)      
approval_rates_stable_employment = count_unique_values(fetch_column(stable_employment_customers, 'class'), normalize=True)      
approval_rates_good_credit_history = count_unique_values(fetch_column(good_credit_history_customers, 'class'), normalize=True)      
  
# Plot pie charts      
fig, axes = create_subplots(1, 3, figsize=(18, 6))      
  
approval_rates_middle_aged.plot(kind='pie', ax=axes[0], autopct='%.1f%%', startangle=90, ylabel='')      
set_plot_split_title(axes[0], 'Loan Approval Rates for Middle-Aged Customers (40-59)')      
  
approval_rates_stable_employment.plot(kind='pie', ax=axes[1], autopct='%.1f%%', startangle=90, ylabel='')      
set_plot_split_title(axes[1], 'Loan Approval Rates for Customers with Stable Employment')      
  
approval_rates_good_credit_history.plot(kind='pie', ax=axes[2], autopct='%.1f%%', startangle=90, ylabel='')      
set_plot_split_title(axes[2], 'Loan Approval Rates for Customers with Good/Excellent Credit History')      
  
set_layout()     
save_plot('ref_result/pieplot.png')     
# show_plots()  
 



import pandas as pd  
from sklearn.model_selection import train_test_split  
from sklearn.preprocessing import LabelEncoder  
from sklearn.linear_model import LogisticRegression  
from sklearn.metrics import confusion_matrix, accuracy_score
import pickle  
from decision_company import read_csv_file, encoder_instance, encode_column, fetch_column, divide_dataset, create_LR_instance, classifier_training, classifier_predictions, calculate_conf_mat, calc_acc

# Load the dataset    
  
# Initialize a label encoder  
le = encoder_instance()  
  
# Convert categorical features to numerical values    
credit_customers['credit_history'] = encode_column(le, fetch_column(credit_customers, 'credit_history'))  
credit_customers['employment'] = encode_column(le, fetch_column(credit_customers, 'employment'))  
credit_customers['class'] = encode_column(le, fetch_column(credit_customers, 'class'))  
  
# Select features and target    
X = credit_customers[['credit_history', 'employment', 'age', 'credit_amount']]    
y = fetch_column(credit_customers, 'class')  
  
# Split the dataset into training and testing sets    
X_train, X_test, y_train, y_test = divide_dataset(X, y, test_size=0.2, random_state=42)  
  
# Initialize and train a logistic regression classifier    
clf = create_LR_instance()  
clf = classifier_training(clf, X_train, y_train)  
  
# Make predictions on the testing set    
y_pred = classifier_predictions(clf, X_test)  
  
# Calculate the confusion matrix and accuracy    
cm = calculate_conf_mat(y_test, y_pred)  
acc = calc_acc(y_test, y_pred)  
  
print("Confusion Matrix:\n", cm)  
pickle.dump(cm,open("./ref_result/Confusion_Matrix.pkl","wb"))
print("Accuracy:", acc) 
pickle.dump(acc,open("./ref_result/Accuracy.pkl","wb"))